R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.0     ✔ purrr   0.3.2
## ✔ tibble  2.1.3     ✔ dplyr   1.0.2
## ✔ tidyr   0.8.3     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## Warning: package 'dplyr' was built under R version 3.6.2
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
SIAP <- read.csv("/Users/erikaluna/R\ Studio/msc_thesis/SIAP.csv") 
one_crop <- SIAP %>% 
  filter(crop == "maiz") %>% 
  group_by(year, state) %>% 
  summarise(ag_yield = round(sum(production)/sum(harvested), digits = 2),
            ag_prod = sum(production),
            ag_planted = sum(planted),
            ag_harv = sum(harvested), 
            ag_losses = sum(losses))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
one_crop %>% 
  DT::datatable()

## Number of observations

## `summarise()` ungrouping output (override with `.groups` argument)

## Panel data A data frama with all years and all states that grow maize.

period <- tibble(rep(c(1980:2016), times = 32)) #32 states report maize production
colnames(period) <- c("year") 
states <- tibble(rep(c("aguascalientes","baja california","baja california sur",
                       "campeche", "chiapas", "chihuahua","coahuila","colima",
                       "distrito federal","durango",
                        "guanajuato", "guerrero", "hidalgo", "jalisco", 
                       "mexico", "michoacan", "morelos", "nayarit","nuevo leon", 
                       "oaxaca", "puebla", "queretaro", "quintana roo",
                        "san luis potosi", "sinaloa", "sonora", "tabasco", 
                       "tamaulipas", "tlaxcala","veracruz", "yucatan", "zacatecas"), times = 37))
colnames(states) <- c("state") 
states <- states %>% 
  arrange(state)
states_period <- cbind(states, period)

Data frame for Maize

maize <- left_join(states_period, one_crop, by=c("state", "year"))
maize <- maize %>%  
  transform(i=as.numeric(factor(state))) %>% 
  transform(t=as.numeric(factor(year))) %>% 
  group_by(year) %>% 
  arrange(state) 

maize %>% 
  DT::datatable()

## Plots ### Production

maize %>% 
  group_by(state) %>% 
  summarise(max_prod = max(ag_prod, na.rm=T),
            min_prod = min(ag_prod, na.rm=T),
            range_prod = max(ag_prod, na.rm=T) - min(ag_prod, na.rm=T),
            sd_prod = sd(ag_prod, na.rm=T),
            mean_prod = mean(ag_prod, na.rm=T),
            median_prod = median(ag_prod, na.rm=T)) %>% 
  knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
state max_prod min_prod range_prod sd_prod mean_prod median_prod
aguascalientes 1493778.03 120108.00 1373670.03 421743.447 752373.75 715440.57
baja california 68442.00 25.00 68417.00 16092.783 18277.56 13658.00
baja california sur 97642.00 6634.00 91008.00 27614.561 37919.32 31540.65
campeche 464714.94 12839.00 451875.94 135606.273 175362.01 133041.15
chiapas 2135550.08 983415.00 1152135.08 282788.096 1461322.34 1460524.00
chihuahua 2319376.63 238326.00 2081050.63 550629.484 1211653.85 1125302.76
coahuila 937497.77 21696.96 915800.81 193974.135 184256.92 132786.00
colima 145185.85 46313.30 98872.55 21054.258 83773.46 77459.00
distrito federal 50161.00 9515.15 40645.85 9107.056 22218.15 23383.00
durango 2773265.28 194100.43 2579164.85 530611.100 689347.17 496439.00
guanajuato 2337499.61 383021.00 1954478.61 472100.685 1033940.03 897201.00
guerrero 1428121.17 331411.00 1096710.17 269719.455 1017910.16 1038965.35
hidalgo 879327.76 233929.00 645398.76 154093.490 519376.35 513798.00
jalisco 8337075.94 2420683.00 5916392.94 1453557.271 4451739.90 4053390.00
mexico 3610125.45 1037491.00 2572634.45 574154.477 2602864.79 2600755.36
michoacan 1935286.73 606327.00 1328959.73 340786.696 1173786.27 1130893.00
morelos 122714.05 29214.00 93500.05 20841.622 85761.65 90722.50
nayarit 439237.43 103011.00 336226.43 78919.814 219908.24 208767.21
nuevo leon 880313.40 30372.90 849940.50 176381.775 181841.54 128472.00
oaxaca 851011.00 220535.00 630476.00 164203.079 607910.18 645255.35
puebla 1417706.20 522662.00 895044.20 227714.433 1005639.22 1074593.00
queretaro 1035148.30 58909.00 976239.30 321939.078 423422.56 335106.00
quintana roo 67469.92 4160.32 63309.60 16919.277 31771.94 33769.56
san luis potosi 256366.00 68751.00 187615.00 39217.713 167479.30 172358.00
sinaloa 6462324.35 64754.00 6397570.35 1910634.563 2297744.80 2449096.00
sonora 841582.00 46513.00 795069.00 179725.857 235612.83 179494.74
tabasco 181556.66 53710.00 127846.66 29035.967 106481.81 104467.40
tamaulipas 1356220.00 153360.81 1202859.19 285901.402 612883.77 557779.01
tlaxcala 733642.80 134414.00 599228.80 168570.419 441059.10 407526.00
veracruz 1360234.55 569513.00 790721.55 222325.058 967546.76 966462.62
yucatan 160737.44 8924.00 151813.44 40031.634 109519.68 120614.85
zacatecas 2689221.34 221965.00 2467256.34 508583.271 588443.59 389535.00
maize %>% 
  ggplot(aes(state, ag_prod)) +
  geom_boxplot() +
  ylab("Production (tonnes)") +
  xlab("State") +
  #scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 68 rows containing non-finite values (stat_boxplot).

number_obs <- maize %>% 
  group_by(state) %>% 
  summarise(obs = sum(!is.na(ag_prod)))
## `summarise()` ungrouping output (override with `.groups` argument)
maize_complete <- number_obs %>% 
  filter(obs > 34)
maize_complete
## # A tibble: 31 x 2
##    state                 obs
##    <chr>               <int>
##  1 aguascalientes         35
##  2 baja california sur    35
##  3 campeche               35
##  4 chiapas                35
##  5 chihuahua              35
##  6 coahuila               35
##  7 colima                 35
##  8 distrito federal       35
##  9 durango                35
## 10 guanajuato             35
## # … with 21 more rows
maize_ts <- maize %>% 
  ggplot(aes(year, ag_prod)) + 
  geom_line()+
  ylab("Production (tonnes)") +
  xlab("Years") +
  ggtitle("maize Production 1980 - 2016") +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
  geom_rect(data = subset(maize, state %in% c(maize_complete$state)), 
            fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
            ymin = -Inf,ymax = Inf) +     
  facet_wrap(~state, scales="free_y", ncol=5) 
  #facet_wrap(~state, ncol=5)
maize_ts  

Yield

maize %>% 
  group_by(state) %>% 
  summarise(max_yield = max(ag_yield, na.rm=T),
            min_yield = min(ag_yield, na.rm=T),
            range_yield = max(ag_yield, na.rm=T) - min(ag_yield, na.rm=T),
            sd_yield = sd(ag_yield, na.rm=T),
            mean_yield = mean(ag_yield, na.rm=T),
            median_yield = median(ag_yield, na.rm=T)) %>% 
  knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
state max_yield min_yield range_yield sd_yield mean_yield median_yield
aguascalientes 39.87 2.00 37.87 7.2798377 11.8245714 10.59
baja california 38.54 0.98 37.56 8.5605573 7.2329032 3.65
baja california sur 7.41 2.64 4.77 1.3394085 5.2271429 5.24
campeche 2.74 0.58 2.16 0.5573117 1.5691429 1.56
chiapas 2.56 1.45 1.11 0.2942522 1.9654286 1.90
chihuahua 9.97 0.57 9.40 2.4569107 5.1268571 4.85
coahuila 18.86 1.07 17.79 3.7133300 5.1442857 4.61
colima 10.51 1.76 8.75 2.1389538 4.0705714 3.12
distrito federal 11.00 1.97 9.03 1.4986115 2.7760000 2.48
durango 12.23 1.14 11.09 2.4056917 3.8565714 3.48
guanajuato 5.90 1.56 4.34 1.3113085 3.1977143 2.76
guerrero 3.06 0.97 2.09 0.4969693 2.2025714 2.26
hidalgo 3.46 1.56 1.90 0.5767255 2.2722857 2.21
jalisco 10.60 3.16 7.44 2.0650197 6.1022857 5.63
mexico 6.51 2.55 3.96 1.0881238 4.3457143 4.56
michoacan 4.15 1.46 2.69 0.7386203 2.5660000 2.31
morelos 3.50 0.91 2.59 0.7066578 2.3077143 2.10
nayarit 8.78 1.91 6.87 1.6842245 3.7422857 2.95
nuevo leon 11.06 1.00 10.06 2.2887925 3.3737143 2.64
oaxaca 1.54 0.92 0.62 0.1389323 1.2651429 1.26
puebla 2.79 1.13 1.66 0.3887487 1.9354286 1.99
queretaro 15.83 0.95 14.88 3.1923000 4.7117143 3.86
quintana roo 0.99 0.26 0.73 0.1616054 0.6131429 0.60
san luis potosi 1.75 0.81 0.94 0.2563096 1.2337143 1.18
sinaloa 10.54 1.03 9.51 3.0795467 5.7425714 6.14
sonora 7.05 2.77 4.28 1.1458762 4.6897143 4.69
tabasco 2.35 1.19 1.16 0.1957906 1.6331429 1.62
tamaulipas 4.94 1.56 3.38 0.9109864 2.9554286 2.74
tlaxcala 5.66 1.69 3.97 1.3930423 3.4051429 2.69
veracruz 2.44 1.47 0.97 0.2583350 1.8357143 1.84
yucatan 1.71 0.49 1.22 0.1963811 0.8885714 0.89
zacatecas 8.98 0.74 8.24 1.7751668 2.0965714 1.34
maize %>% 
  ggplot(aes(state, ag_yield)) +
  geom_boxplot() +
  ylab("Yield (tonnes/ha)") +
  xlab("State") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 68 rows containing non-finite values (stat_boxplot).

number_obs <- maize %>% 
  group_by(state) %>% 
  summarise(obs = sum(!is.na(ag_yield)))
## `summarise()` ungrouping output (override with `.groups` argument)
maize_complete <- number_obs %>% 
  filter(obs > 34)
maize_complete
## # A tibble: 31 x 2
##    state                 obs
##    <chr>               <int>
##  1 aguascalientes         35
##  2 baja california sur    35
##  3 campeche               35
##  4 chiapas                35
##  5 chihuahua              35
##  6 coahuila               35
##  7 colima                 35
##  8 distrito federal       35
##  9 durango                35
## 10 guanajuato             35
## # … with 21 more rows
maize_ts <- maize %>% 
  ggplot(aes(year, ag_yield)) + 
  geom_line()+
  ylab("Yield (tonnes/ha)") +
  xlab("Years") +
  ggtitle("maize Yields 1980 - 2016") +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
  geom_rect(data = subset(maize, state %in% c(maize_complete$state)), 
            fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
            ymin = -Inf,ymax = Inf) +     
  facet_wrap(~state, scales="free_y", ncol=5) 
  #facet_wrap(~state, ncol=5)
maize_ts  

Area

maize %>% 
  group_by(state) %>% 
  summarise(max_area = max(ag_harv, na.rm=T),
            min_area = min(ag_harv, na.rm=T),
            range_area = max(ag_harv, na.rm=T) - min(ag_harv, na.rm=T),
            sd_area = sd(ag_harv, na.rm=T),
            mean_area = mean(ag_harv, na.rm=T),
            median_area = median(ag_harv, na.rm=T)) %>% 
  knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
state max_area min_area range_area sd_area mean_area median_area
aguascalientes 107831.0 11862.00 95969.00 28937.729 68341.029 66869.0
baja california 18469.0 6.50 18462.50 4305.230 3838.984 2510.0
baja california sur 21908.0 1550.00 20358.00 6358.061 7633.316 4981.0
campeche 186293.0 22196.00 164097.00 50751.509 97657.640 104431.0
chiapas 960143.8 504332.00 455811.84 120143.243 746915.560 702700.0
chihuahua 467063.0 78410.00 388653.00 89282.790 257589.847 259377.0
coahuila 54916.0 14750.55 40165.45 11165.860 33169.454 32642.0
colima 43703.0 11920.00 31783.00 10029.121 24591.167 24109.0
distrito federal 14384.0 2160.00 12224.00 3184.903 8485.669 8136.0
durango 235521.0 94009.76 141511.24 36710.573 177580.652 181708.0
guanajuato 455055.6 147933.00 307122.58 77625.786 327466.658 336557.0
guerrero 513566.0 341752.00 171814.00 41750.341 457701.783 466473.0
hidalgo 280490.0 105952.00 174538.00 32671.741 228483.923 233995.2
jalisco 916264.0 590515.49 325748.51 68458.874 738114.661 748010.7
mexico 746816.0 360047.60 386768.40 80984.400 607378.266 605970.5
michoacan 561059.0 355787.48 205271.52 41065.500 458596.557 464983.0
morelos 57820.0 19452.00 38368.00 10091.074 38934.708 39360.0
nayarit 107741.0 43213.25 64527.75 14931.730 61620.539 57382.0
nuevo leon 94674.3 12611.80 82062.50 20017.335 52610.726 54303.0
oaxaca 598942.5 239930.00 359012.52 92999.724 473285.145 485977.3
puebla 629374.0 309824.31 319549.69 75510.784 521914.969 541182.0
queretaro 123355.9 43143.00 80212.90 23555.979 86984.931 88259.0
quintana roo 85575.0 9660.00 75915.00 20425.422 49413.715 49868.0
san luis potosi 218808.1 46456.00 172352.15 47219.335 143351.988 146118.0
sinaloa 613197.3 46119.00 567078.29 180695.804 311682.077 363936.0
sonora 172528.0 15090.00 157438.00 38179.905 51395.407 35458.0
tabasco 105023.0 30880.00 74143.00 18947.972 66039.302 67377.0
tamaulipas 504704.0 98155.25 406548.75 100911.694 213186.951 198205.0
tlaxcala 161104.0 73478.00 87626.00 18408.116 131840.288 131124.0
veracruz 647535.4 338706.00 308829.40 74599.616 522976.031 536845.0
yucatan 166785.0 13128.00 153657.00 40496.663 123131.349 139404.0
zacatecas 442043.0 101513.60 340529.40 79128.893 295477.558 297417.5
maize %>% 
  ggplot(aes(state, ag_harv)) +
  geom_boxplot() +
  ylab("Area (tonnes)") +
  xlab("State") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 67 rows containing non-finite values (stat_boxplot).

number_obs <- maize %>% 
  group_by(state) %>% 
  summarise(obs = sum(!is.na(ag_harv)))
## `summarise()` ungrouping output (override with `.groups` argument)
maize_complete <- number_obs %>% 
  filter(obs > 34)
maize_complete
## # A tibble: 31 x 2
##    state                 obs
##    <chr>               <int>
##  1 aguascalientes         35
##  2 baja california sur    35
##  3 campeche               35
##  4 chiapas                35
##  5 chihuahua              35
##  6 coahuila               35
##  7 colima                 35
##  8 distrito federal       35
##  9 durango                35
## 10 guanajuato             35
## # … with 21 more rows
maize_ts <- maize %>% 
  ggplot(aes(year, ag_harv)) + 
  geom_line()+
  ylab("Area harvested (ha)") +
  xlab("Years") +
  ggtitle("maize - Area Harvested (ha) 1980 - 2016") +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
  geom_rect(data = subset(maize, state %in% c(maize_complete$state)), 
            fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
            ymin = -Inf,ymax = Inf) +     
  facet_wrap(~state, scales="free_y", ncol=5) 
  #facet_wrap(~state, ncol=5)
maize_ts  

Losses

maize %>% 
  group_by(state) %>% 
  summarise(max_losses = max(ag_losses, na.rm=T),
            min_losses = min(ag_losses, na.rm=T),
            range_losses = max(ag_losses, na.rm=T) - min(ag_losses, na.rm=T),
            sd_losses = sd(ag_losses, na.rm=T),
            mean_losses = mean(ag_losses, na.rm=T),
            median_losses = median(ag_losses, na.rm=T)) %>% 
  knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
state max_losses min_losses range_losses sd_losses mean_losses median_losses
aguascalientes 98179.0 838.0 97341.0 26988.5843 34821.7794 32473.500
baja california 3937.0 0.0 3937.0 874.2048 652.6129 327.000
baja california sur 4479.0 0.0 4479.0 758.1018 511.7162 295.750
campeche 102704.0 603.0 102101.0 24488.9392 20843.0917 11390.000
chiapas 120142.0 0.0 120142.0 30016.9511 28319.8903 19373.000
chihuahua 120423.0 0.0 120423.0 37834.4437 43595.8477 31065.000
coahuila 26530.0 4.5 26525.5 7538.9236 12510.2131 12368.000
colima 11297.0 0.0 11297.0 2560.0987 1852.5429 849.500
distrito federal 3083.0 0.0 3083.0 633.4964 334.8771 21.500
durango 116948.0 734.0 116214.0 30675.0632 29733.5677 16725.000
guanajuato 315239.0 5352.0 309887.0 70134.1731 94967.8444 68616.000
guerrero 118839.0 1971.0 116868.0 27630.4202 20315.9821 10258.250
hidalgo 147952.0 4178.9 143773.1 34477.2629 43952.7397 33819.850
jalisco 201237.0 0.0 201237.0 45142.1965 51988.8726 35475.500
mexico 284474.0 0.0 284474.0 56461.4675 31347.6224 13341.000
michoacan 134149.0 6713.0 127436.0 29750.4589 35081.6806 26622.500
morelos 36077.0 0.0 36077.0 6468.7874 2406.3403 133.000
nayarit 11254.0 0.0 11254.0 2878.9156 2337.1486 1261.000
nuevo leon 81924.0 404.0 81520.0 23709.7851 29212.2491 30140.000
oaxaca 187341.0 1010.0 186331.0 51409.7368 65867.4032 43324.500
puebla 292870.2 4240.0 288630.2 74611.8889 79630.7631 48354.000
queretaro 74808.4 0.0 74808.4 20777.9119 25499.7086 18273.000
quintana roo 64672.0 350.0 64322.0 19000.7413 21795.2380 17931.000
san luis potosi 152662.2 37590.0 115072.2 35708.2068 90717.3109 95791.500
sinaloa 416497.8 5046.0 411451.8 71139.0756 47302.9776 27423.500
sonora 24422.0 0.0 24422.0 5689.9027 4264.8015 2649.000
tabasco 34128.0 242.0 33886.0 8200.7811 8798.5000 5850.625
tamaulipas 88012.0 8466.0 79546.0 21456.4533 38187.3582 34609.150
tlaxcala 71459.0 0.0 71459.0 15066.7309 8802.8491 1498.000
veracruz 117580.0 5149.0 112431.0 29292.9785 54707.6020 49240.000
yucatan 171644.0 0.0 171644.0 45117.3412 32991.3453 13879.500
zacatecas 180285.0 1773.0 178512.0 53379.0982 68336.0388 69056.000
maize %>% 
  ggplot(aes(state, ag_losses)) +
  geom_boxplot() +
  ylab("Losses (ha)") +
  xlab("State") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 83 rows containing non-finite values (stat_boxplot).

number_obs <- maize %>% 
  group_by(state) %>% 
  summarise(obs = sum(!is.na(ag_losses)))
## `summarise()` ungrouping output (override with `.groups` argument)
maize_complete <- number_obs %>% 
  filter(obs > 34)
maize_complete
## # A tibble: 16 x 2
##    state              obs
##    <chr>            <int>
##  1 campeche            35
##  2 chiapas             35
##  3 chihuahua           35
##  4 coahuila            35
##  5 colima              35
##  6 distrito federal    35
##  7 durango             35
##  8 hidalgo             35
##  9 morelos             35
## 10 nayarit             35
## 11 nuevo leon          35
## 12 puebla              35
## 13 queretaro           35
## 14 quintana roo        35
## 15 tlaxcala            35
## 16 veracruz            35
maize_ts <- maize %>% 
  ggplot(aes(year, ag_losses)) + 
  geom_line()+
  ylab("Losses (ha)") +
  xlab("Years") +
  ggtitle("maize - Losses (planted-harvested) (ha) 1980 - 2016") +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
  geom_rect(data = subset(maize, state %in% c(maize_complete$state)), 
            fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
            ymin = -Inf,ymax = Inf) +     
  facet_wrap(~state, scales="free_y", ncol=5) 
  #facet_wrap(~state, ncol=5)
maize_ts